Context

Load

NTHREADS=20
analysis_tag <- '04_velocyto_batch'
library(pheatmap)
library(knitr)
library(scater)
library(Seurat)
library(Cairo)
library(edgeR)
library(data.table)
## library(biomaRt)
library(dplyr)
## library(reshape2)
#library(iSEE)
## library(Matrix)
## library(scran)
## library(mvoutlier)
## library(shiny)
## library(kableExtra)
## library(velocyto.R)
## library(cowplot)
## library(corrplot)
library(DT)
library(devtools)
## library(viridis)
## library("CellMixS")
library(gplots)
library(pheatmap)
library(velocyto.R)
ac <- function(col, alpha = 1) {
    
    apply(sapply(col, col2rgb)/255, 2, function(x) rgb(x[1], 
        x[2], x[3], alpha = alpha))
}

Data load

Provided by Jaeger

so <- readRDS(file.path("..", "data", "Seurat_object_Gli_Ascl_combi_meta.Robj"))

Plus velocyto looms (from task 02_velocyto_mapping)

looms <- list()

for (fn in list.files(file.path("..", "data", "looms"), pattern = ".*loom", 
    recursive = TRUE)) {
    
    ldat <- read.loom.matrices(file.path("..", "data", "looms", 
        fn))
    
    ldat <- lapply(ldat, function(x) {
        colnames(x) <- gsub("Aligned.sortedByCoord.out.bam", 
            "", colnames(x))
        x
    })
    
    ldat <- lapply(ldat, function(x) {
        colnames(x) <- gsub(".*:", "", colnames(x))
        x
    })
    
    looms[[dirname(fn)]] <- ldat
}
reading loom file via hdf5r...
reading loom file via hdf5r...
reading loom file via hdf5r...
reading loom file via hdf5r...
reading loom file via hdf5r...
reading loom file via hdf5r...
reading loom file via hdf5r...

Getting genes that are batch-specific

To be filtered out from velocyto calculations

alt_batch <- as.character(so@meta.data$batch)


alt_batch[alt_batch == "Ascl1_12wk_1"] <- "B1"
alt_batch[alt_batch == "Ascl1_12wk_2"] <- "B2"
alt_batch[alt_batch == "Ascl1_5day_1"] <- "B1"
alt_batch[alt_batch == "Ascl1_5day_2"] <- "B2"
alt_batch[alt_batch == "Gli1_12wk"] <- "B1"
alt_batch[alt_batch == "Gli1_5day_1"] <- "B1"
alt_batch[alt_batch == "Gli1_5day_2"] <- "B1"
alt_batch <- as.factor(alt_batch)
table(alt_batch)
alt_batch
 B1  B2 
642 413 
so@meta.data$alt_batch <- alt_batch


DefaultAssay(so) <- "RNA"
Idents(so) <- "alt_batch"

markers <- FindMarkers(so, ident.1 = "B1", ident.2 = "B2", test.use = "LR", 
    latent.vars = "combi", min.pct = 0.2)
Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
markers <- markers[order(markers$p_val_adj), ]
markers <- markers[markers$p_val_adj < 0.05, ]
markers$analysis <- "batch_relevant_lr"
markers <- data.frame(gene = rownames(markers), markers)

DT::datatable(markers %>% as.data.frame() %>% dplyr::mutate_if(is.numeric, 
    funs(round(., 2))), extensions = c("Buttons", "FixedColumns"), 
    rownames = FALSE, options = list(dom = "Bfrtip", scrollX = TRUE, 
        fixedColumns = list(leftColumns = 1), buttons = c("csv", 
            "excel")))

Subset loom files removing batch-affected genes

Removal of batch-affected genes (simple set differences between count tables and the significantly batch-affected genes)

## selected <- setdiff(markers$gene, VariableFeatures(so))

for (loom in names(looms)) {
    for (item in names(looms[[loom]])) {
        curr <- setdiff(rownames(looms[[loom]][[item]]), markers$gene)
        looms[[loom]][[item]] <- looms[[loom]][[item]][curr, 
            ]
    }
}

Pasting loom files together (assumes equivalent gene sorting)

ldat <- list()

ldat$spliced <- do.call(cbind.data.frame, sapply(looms, function(x) return(x$spliced)))
ldat$unspliced <- do.call(cbind.data.frame, sapply(looms, function(x) return(x$unspliced)))
ldat$spanning <- do.call(cbind.data.frame, sapply(looms, function(x) return(x$spanning)))

for (item in names(ldat)) {
    colnames(ldat[[item]]) <- sapply(strsplit(colnames(ldat[[item]]), 
        ".", fixed = TRUE), function(x) return(x[2]))
}

Making sure we don’t have the batch-affected genes.

stopifnot(length(intersect(as.character(markers$gene), rownames(ldat$spliced))) == 
    0)

Remove cells that are not in use (Seurat’s object contains the QC-ed cells only).

for (item in names(ldat)) {
    ldat[[item]] <- ldat[[item]][, intersect(colnames(ldat[[item]]), 
        colnames(so))]
}

Coloring cells according to the combi Jaeger’s naming that includes the genotype and the cell cluster

cell_colors <- so@meta.data["combi"]

Distances as in Seurat’s PCA

cell_dist <- as.dist(1 - armaCor(t(Embeddings(so, reduction = "pca"))))
# exonic read (spliced) expression matrix
emat <- ldat$spliced
# intronic read (unspliced) expression matrix
nmat <- ldat$unspliced
# spanning read (intron+exon) expression matrix
smat <- ldat$spanning

summary(apply(emat, 1, mean))
    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
   0.000    0.000    0.000    2.753    0.102 4857.934 
summary(apply(nmat, 1, mean))
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
   0.0000    0.0000    0.0000    0.5442    0.0154 1339.1415 
summary(apply(smat, 1, mean))
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
  0.00000   0.00000   0.00000   0.11440   0.00103 270.48615 
table(rownames(cell_colors) %in% colnames(emat))

FALSE  TRUE 
   80   975 
## cell_colors <- data.frame(cell_colors[rownames(cell_colors)
## %in% colnames(emat),], row.names =
## rownames(cell_colors)[rownames(cell_colors) %in%
## colnames(emat)])

cell_colors$cell <- rownames(cell_colors)

cell_colors$color <- rainbow(8)[as.numeric(as.factor(cell_colors$combi))]

cnames <- cell_colors$cell

cell_colors <- cell_colors$color
names(cell_colors) <- cnames

Please mind and/or finetune the min max cluster average values

## filter expression matrices based on some minimum
## max-cluster averages
emat <- filter.genes.by.cluster.expression(emat, cell_colors, 
    min.max.cluster.average = 0.2)
nmat <- filter.genes.by.cluster.expression(nmat, cell_colors, 
    min.max.cluster.average = 0.05)
smat <- filter.genes.by.cluster.expression(smat, cell_colors, 
    min.max.cluster.average = 0.01)
# look at the resulting gene set
length(intersect(rownames(emat), rownames(nmat)))
[1] 11003

Estimating velocities, note the k parameter

set.seed(2)
fit.quantile <- 0.02
rvel.qf <- gene.relative.velocity.estimates(as.matrix(emat), 
    as.matrix(nmat), deltaT = 1, kCells = 25, cell.dist = cell_dist, 
    n.cores = NTHREADS, fit.quantile = fit.quantile)
Warning in labels(cell.dist) == colnames(emat): longer object length is not
a multiple of shorter object length
matching cells between cell.dist and emat/nmat ... done
calculating cell knn ... done
calculating convolved matrices ... done
fitting gamma coefficients ... done. succesfful fit for 11003 genes
filtered out 2747 out of 11003 genes due to low nmat-emat correlation
filtered out 2698 out of 8256 genes due to low nmat-emat slope
calculating RNA velocity shift ... done
calculating extrapolated cell state ... done
pca.velocity.plot(rvel.qf, nPcs = 5, plot.cols = 2, cell.colors = ac(cell_colors, 
    alpha = 0.7), cex = 1.2, pcount = 0.1, pc.multipliers = c(1, 
    -1, -1, -1, -1))
log ... pca ... pc multipliers ... delta norm ... done

done

Notice the number of neighbors (n= 100 etc)

umap_coord <- Embeddings(so, reduction = "umap")

show.velocity.on.embedding.cor(umap_coord, rvel.qf, n = 100, 
    scale = "sqrt", cell.colors = ac(cell_colors, alpha = 0.5), 
    cex = 1, arrow.scale = 4, arrow.lwd = 1, do.par = TRUE, cell.border.alpha = 0.1)

delta projections ... sqrt knn ... transition probs ... done
calculating arrows ... done
umap_coord_ascl = umap_coord[grep("Ascl", rownames(umap_coord)), 
    ]
umap_coord_gli = umap_coord[!rownames(umap_coord) %in% rownames(umap_coord_ascl), 
    ]
umap_coord_A_5d_12w_2 = umap_coord[grep("Ascl1_12wk_2|II", rownames(umap_coord)), 
    ]

Velocities on subsets of cells

Ascl

## Velocity for Ascl and Gli only
show.velocity.on.embedding.cor(umap_coord_ascl, rvel.qf, n = 100, 
    scale = "sqrt", cell.colors = ac(cell_colors, alpha = 0.5), 
    cex = 1, arrow.scale = 4, arrow.lwd = 1, do.par = T, cell.border.alpha = 0.1)

delta projections ... sqrt knn ... transition probs ... done
calculating arrows ... done

A_5d_12w_2

show.velocity.on.embedding.cor(umap_coord_A_5d_12w_2, rvel.qf, 
    n = 100, scale = "sqrt", cell.colors = ac(cell_colors, alpha = 0.5), 
    cex = 1, arrow.scale = 4, arrow.lwd = 1, do.par = T, cell.border.alpha = 0.1)

delta projections ... sqrt knn ... transition probs ... done
calculating arrows ... done

Gli

show.velocity.on.embedding.cor(umap_coord_gli, rvel.qf, n = 100, 
    scale = "sqrt", cell.colors = ac(cell_colors, alpha = 0.5), 
    cex = 1, arrow.scale = 4, arrow.lwd = 1, do.par = T, cell.border.alpha = 0.1)

delta projections ... sqrt knn ... transition probs ... done
calculating arrows ... done

Shall we increase the number of neighbors while plotting? Example with 300 neighbors.

show.velocity.on.embedding.cor(umap_coord, rvel.qf, n = 300, 
    scale = "sqrt", cell.colors = ac(cell_colors, alpha = 0.5), 
    cex = 1, arrow.scale = 4, arrow.lwd = 1, do.par = TRUE, cell.border.alpha = 0.1)

delta projections ... sqrt knn ... transition probs ... done
calculating arrows ... done

Save an image with all objects

save.image("04_velocyto_batch_removal.RData")

Session

date()
[1] "Wed Aug 28 13:14:16 2019"
devtools::session_info()
Session info -------------------------------------------------------------
 setting  value                       
 version  R version 3.5.1 (2018-07-02)
 system   x86_64, linux-gnu           
 ui       X11                         
 language en_CA:en                    
 collate  en_CA.UTF-8                 
 tz       Europe/Zurich               
 date     2019-08-28                  
Packages -----------------------------------------------------------------
 package              * version   date      
 ape                    5.2       2018-09-24
 assertthat             0.2.0     2017-04-11
 backports              1.1.2     2017-12-13
 base                 * 3.5.1     2018-07-09
 beeswarm               0.2.3     2016-04-25
 bindr                  0.1.1     2018-03-13
 bindrcpp             * 0.2.2     2018-03-29
 Biobase              * 2.40.0    2018-07-09
 BiocGenerics         * 0.26.0    2018-07-09
 BiocParallel         * 1.14.1    2018-07-09
 bit                    1.1-14    2018-05-29
 bit64                  0.9-7     2017-05-08
 bitops                 1.0-6     2013-08-17
 Cairo                * 1.5-9     2015-09-26
 caTools                1.17.1.1  2018-07-20
 cluster                2.0.7-1   2018-04-13
 codetools              0.2-15    2016-10-05
 colorspace             1.3-2     2016-12-14
 compiler               3.5.1     2018-07-09
 cowplot                0.9.4     2019-01-08
 crayon                 1.3.4     2017-09-16
 crosstalk              1.0.0     2016-12-21
 data.table           * 1.11.4    2018-05-27
 datasets             * 3.5.1     2018-07-09
 DelayedArray         * 0.6.1     2018-06-15
 DelayedMatrixStats     1.2.0     2018-10-19
 devtools             * 1.13.6    2018-06-27
 digest                 0.6.18    2018-10-10
 dplyr                * 0.7.8     2018-11-10
 DT                   * 0.4       2018-01-30
 edgeR                * 3.22.3    2018-06-21
 evaluate               0.10.1    2017-06-24
 fitdistrplus           1.0-11    2018-09-10
 formatR                1.5       2017-04-25
 future                 1.14.0    2019-07-02
 future.apply           1.3.0     2019-06-18
 gdata                  2.18.0    2017-06-06
 GenomeInfoDb         * 1.16.0    2018-07-09
 GenomeInfoDbData       1.1.0     2018-07-09
 GenomicRanges        * 1.32.7    2018-09-20
 ggbeeswarm             0.6.0     2017-08-07
 ggplot2              * 3.1.0     2018-10-25
 ggrepel                0.8.0     2018-05-09
 ggridges               0.5.1     2018-09-27
 globals                0.12.4    2018-10-11
 glue                   1.3.0     2018-07-17
 gplots               * 3.0.1     2016-03-30
 graphics             * 3.5.1     2018-07-09
 grDevices            * 3.5.1     2018-07-09
 grid                   3.5.1     2018-07-09
 gridExtra              2.3       2017-09-09
 gtable                 0.2.0     2016-02-26
 gtools                 3.8.1     2018-06-26
 hdf5r                  1.2.0     2019-04-20
 htmltools              0.3.6     2017-04-28
 htmlwidgets            1.3       2018-09-30
 httpuv                 1.4.4.2   2018-07-02
 httr                   1.3.1     2017-08-20
 ica                    1.0-2     2018-05-24
 igraph                 1.2.4.1   2019-04-22
 IRanges              * 2.14.12   2018-09-20
 irlba                  2.3.2     2018-01-11
 jsonlite               1.5       2017-06-01
 KernSmooth             2.23-15   2015-06-29
 knitr                * 1.22      2019-03-08
 later                  0.7.3     2018-06-08
 lattice                0.20-35   2017-03-25
 lazyeval               0.2.1     2017-10-29
 leiden                 0.3.1     2019-07-23
 limma                * 3.36.2    2018-06-21
 listenv                0.7.0     2018-01-21
 lmtest                 0.9-36    2018-04-04
 locfit                 1.5-9.1   2013-04-20
 lsei                   1.2-0     2017-10-23
 magrittr               1.5       2014-11-22
 MASS                   7.3-50    2018-04-30
 Matrix               * 1.2-14    2018-04-09
 matrixStats          * 0.54.0    2018-07-23
 memoise                1.1.0     2017-04-21
 metap                  0.9       2018-04-25
 methods              * 3.5.1     2018-07-09
 mgcv                   1.8-24    2018-06-23
 mime                   0.5       2016-07-07
 munsell                0.5.0     2018-06-12
 nlme                   3.1-137   2018-04-07
 npsurv                 0.4-0     2017-10-14
 parallel             * 3.5.1     2018-07-09
 pbapply                1.3-4     2018-01-10
 pcaMethods             1.72.0    2018-07-13
 pheatmap             * 1.0.12    2019-01-04
 pillar                 1.3.1     2018-12-15
 pkgconfig              2.0.2     2018-08-16
 plotly                 4.8.0     2018-07-20
 plyr                   1.8.4     2016-06-08
 png                    0.1-7     2013-12-03
 promises               1.0.1     2018-04-13
 purrr                  0.3.0     2019-01-27
 R.methodsS3            1.7.1     2016-02-16
 R.oo                   1.22.0    2018-04-22
 R.utils                2.7.0     2018-08-27
 R6                     2.4.0     2019-02-14
 RANN                   2.6       2018-07-16
 RColorBrewer           1.1-2     2014-12-07
 Rcpp                   1.0.2     2019-07-25
 RcppAnnoy              0.0.12    2019-05-12
 RcppParallel           4.4.3     2019-05-22
 RCurl                  1.95-4.10 2018-01-04
 reshape2               1.4.3     2017-12-11
 reticulate             1.10      2018-08-05
 rhdf5                  2.24.0    2018-10-19
 Rhdf5lib               1.2.1     2018-10-19
 rjson                  0.2.20    2018-06-08
 rlang                  0.3.1     2019-01-08
 rmarkdown              1.10      2018-06-11
 ROCR                   1.0-7     2015-03-26
 rprojroot              1.3-2     2018-01-03
 rstudioapi             0.7       2017-09-07
 rsvd                   1.0.2     2019-07-29
 Rtsne                  0.13      2017-04-14
 S4Vectors            * 0.18.3    2018-07-09
 scales                 1.0.0     2018-08-09
 scater               * 1.8.4     2018-08-13
 sctransform            0.2.0     2019-04-12
 SDMTools               1.1-221   2014-08-05
 Seurat               * 3.1.0     2019-08-20
 shiny                  1.1.0     2018-05-17
 shinydashboard         0.7.1     2018-10-17
 SingleCellExperiment * 1.2.0     2018-10-19
 splines                3.5.1     2018-07-09
 stats                * 3.5.1     2018-07-09
 stats4               * 3.5.1     2018-07-09
 stringi                1.2.3     2018-06-12
 stringr                1.4.0     2019-02-10
 SummarizedExperiment * 1.10.1    2018-07-09
 survival               2.42-4    2018-06-30
 tibble                 2.0.1     2019-01-12
 tidyr                  0.8.2     2018-10-28
 tidyselect             0.2.5     2018-10-11
 tools                  3.5.1     2018-07-09
 tsne                   0.1-3     2016-07-15
 tximport               1.8.0     2018-10-19
 utils                * 3.5.1     2018-07-09
 uwot                   0.1.3     2019-04-07
 velocyto.R           * 0.6       2019-08-21
 vipor                  0.4.5     2017-03-22
 viridis                0.5.1     2018-03-29
 viridisLite            0.3.0     2018-02-01
 withr                  2.1.2     2018-03-15
 xfun                   0.3       2018-07-06
 xtable                 1.8-3     2018-08-29
 XVector                0.20.0    2018-07-09
 yaml                   2.1.19    2018-05-01
 zlibbioc               1.26.0    2018-07-09
 zoo                    1.8-4     2018-09-19
 source                                   
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 local                                    
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 Bioconductor                             
 Bioconductor                             
 Bioconductor                             
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 local                                    
 cran (@0.9.4)                            
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 local                                    
 Bioconductor                             
 Bioconductor                             
 CRAN (R 3.5.1)                           
 cran (@0.6.18)                           
 cran (@0.7.8)                            
 CRAN (R 3.5.1)                           
 Bioconductor                             
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 Bioconductor                             
 Bioconductor                             
 cran (@1.32.7)                           
 CRAN (R 3.5.1)                           
 cran (@3.1.0)                            
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 cran (@1.3.0)                            
 CRAN (R 3.5.1)                           
 local                                    
 local                                    
 local                                    
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 cran (@1.2.0)                            
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 cran (@1.2.4.1)                          
 cran (@2.14.12)                          
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 Bioconductor                             
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 local                                    
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 local                                    
 CRAN (R 3.5.1)                           
 Bioconductor                             
 cran (@1.0.12)                           
 cran (@1.3.1)                            
 cran (@2.0.2)                            
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 cran (@0.3.0)                            
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 cran (@2.4.0)                            
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 cran (@1.0.2)                            
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 Bioconductor                             
 Bioconductor                             
 CRAN (R 3.5.0)                           
 cran (@0.3.1)                            
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 Bioconductor                             
 cran (@1.0.0)                            
 Bioconductor                             
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 Bioconductor                             
 local                                    
 local                                    
 local                                    
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 Bioconductor                             
 CRAN (R 3.5.1)                           
 cran (@2.0.1)                            
 cran (@0.8.2)                            
 cran (@0.2.5)                            
 local                                    
 CRAN (R 3.5.1)                           
 Bioconductor                             
 local                                    
 CRAN (R 3.5.1)                           
 Github (velocyto-team/velocyto.R@d779034)
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.1)                           
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.0)                           
 CRAN (R 3.5.1)                           
 Bioconductor                             
 CRAN (R 3.5.0)                           
 Bioconductor                             
 CRAN (R 3.5.1)